Update: 2015-03-22 Purpose:
讨论常见图形的命令、参数和效果,尽量用R基础功能实现,效果不输excel
Data Used: iris, VADeaths
Packages Used: MASS,RColorBrewer
# base barplot
barplot( table(iris[,5]) )
plot( table(iris[,5]), type = "h")
more bars
#统计汇总条形图
l <- aggregate( Sepal.Length ~ Species, data= iris, mean)
x <- barplot( l[,2] , col=terrain.colors(3) , xlim=c(0,5), ylim=c(0,8) ,axe=F, names.arg = l[,1] )
y <- as.matrix( l[,2] )
text( x, y+1, labels=l[,2], col="black" ) #柱顶标注,y +n调节标注高度,横放图则调x+n
legend(legend=l[,1], "right", pch=15, col=terrain.colors(3) )
grouped bars
# 堆砌和分组
l <- table( mtcars$cyl , mtcars$gear);l
##
## 3 4 5
## 4 1 8 2
## 6 2 4 1
## 8 12 0 2
barplot( l, beside=T, main = "car tpyes: cyl and gear",
names.arg=c( "gear=3","gear=4","gear=5"), legend=c("cyl=4", "cly=6", "cyl=8"))
barplot( l, beside=F, main = "car tpyes: cyl and gear",
names.arg=c( "gear=3","gear=4","gear=5"), legend=c("cyl=4", "cly=6", "cyl=8"))
# Pie Chart with Percentages
slices <- c(10, 12, 4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
main="Pie Chart of Countries")
# Dotplot: Grouped Sorted and Colored
# Sort by mpg, group and color by cylinder
x <- mtcars[order(mtcars$mpg),] # sort by mpg
x$cyl <- factor(x$cyl) # it must be a factor
x$color[x$cyl==4] <- "red"
x$color[x$cyl==6] <- "blue"
x$color[x$cyl==8] <- "darkgreen"
dotchart(x$mpg,labels=row.names(x),cex=.7,groups= x$cyl,
main="Gas Milage for Car Models\ngrouped by cylinder",
xlab="Miles Per Gallon", gcolor="black", color=x$color)
x <- mtcars$mpg
h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon", prob=F ,
main="Histogram with Normal Curve")
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x)) # 正态分布可以这么画,其他分布参考分布函数
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)
# 累计分布函数
plot(ecdf(x),verticals = TRUE, do.p = FALSE)
lines(x, pnorm(x, mean(x), sd(x)), col = "red")
# QQ图
qqnorm(x); qqline(x, col = "red")
# 密度函数
h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon", prob=T ,
main="Histogram with Normal Curve")
lines(density(x))
# 离散变量直方图
plot( table(mtcars$carb), type="h" , lwd=5 )
boxplot( Petal.Length ~ Species , data = iris , outline = F ) # 离群值不显示
boxplot( mpg ~ cyl * gear, data = mtcars , varwidth=T ) # 箱体宽度由样本量决定
# 简单漂亮的散点图:两个连续变量之间的关系
plot(cars$dist~cars$speed, # y~x
main="Relationship between car distance & speed", #Plot Title
xlab="Speed (miles per hour)", #X axis title
ylab="Distance travelled (miles)", #Y axis title
xlim=c(0,30),
#Set x axis limits from 0 to 30 ylim=c(0,140),
#Set y axis limits from 0 to 30140 xaxs="i",
#Set x axis style as internal
yaxs="i", #Set y axis style as internal
col="red", #Set the colour of plotting symbol to red
pch=19) #Set the plotting symbol to filled dots
# 分类散点图
# 先将鸢尾花的类型转化为整数1 、2 、3,便于使用向量
idx = as.integer(iris[["Species"]])
plot(iris[, 3:4], pch = c(24, 21, 25)[idx], col = c("black","red", "blue")[idx], panel.first = grid())
legend("topleft", legend = levels(iris[["Species"]]), col = c("black", "red", "blue"),
pch = c(24, 21, 25), bty = "n")
# 散点矩阵图
pairs( ~ Sepal.Length + Sepal.Width + Petal.Length +Petal.Width, data=iris)
## matplot 分类散点图,强化类别差异的表现
nam.var <- colnames(iris)[-5]
nam.spec <- as.character(iris[1+50*0:2, "Species"])
iris.S <- array(NA, dim = c(50,4,3),
dimnames = list(NULL, nam.var, nam.spec))
for(i in 1:3) iris.S[,,i] <- data.matrix(iris[1:50+50*(i-1), -5])
matplot(iris.S[, "Petal.Length",], iris.S[, "Petal.Width",], pch = "SCV",
col = rainbow(3, start = 0.8, end = 0.1),
sub = paste(c("S", "C", "V"), dimnames(iris.S)[[3]],
sep = "=", collapse= ", "),
main = "Fisher's Iris Data")
data <- VADeaths
plot(data[,1] ,type="b" , lwd =2, xaxt="n" , ylim=c(0, 75) ,
main ="Death Rates in Virginia (1940)", xlab="age", ylab="Death Rate" )
axis( 1, at=1:nrow(data) ,labels = row.names(data) )
lines( data[,2], type="b" , lwd =2, col="red" )
lines( data[,3], type="b" , lwd =2, col="orange" )
lines( data[,4], type="b" , lwd =2, col="purple" )
legend( 4,30 , legend=colnames(data) , lty=1, lwd=2, pch=21, bty="n" ,
col=c("black","red","orange","purple") , cex =0.8, inset=0.01)
grid()
library(MASS)
cal<-corresp(USPersonalExpenditure,nf=2) ;
## Warning: negative or non-integer entries in table
biplot(cal,expand=1.5, xlim=c(-0.5 , 0.5), ylim=c(-0.1 , 0.15))
abline(v=0,h=0,lty=3)
pca <- princomp( iris[,-5], cor = T)
summary(pca, loadings=T, cutoff= 0.01)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 1.7084 0.9560 0.38309 0.143926
## Proportion of Variance 0.7296 0.2285 0.03669 0.005179
## Cumulative Proportion 0.7296 0.9581 0.99482 1.000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4
## Sepal.Length 0.521 -0.377 0.720 0.261
## Sepal.Width -0.269 -0.923 -0.244 -0.124
## Petal.Length 0.580 -0.024 -0.142 -0.801
## Petal.Width 0.565 -0.067 -0.634 0.524
screeplot( pca , type="lines" )
load <- loadings(pca)
plot(load[,1:2] ); text( load[,1], load[,2], adj=c(0.01,0.01))
dist <-dist(scale(iris[,c(1:4)]))
hc <- hclust(dist, "ward")
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
plclust(hc,hang=-1 , labels=iris[,5] )
## Warning: 'plclust' is deprecated.
## Use 'plot' instead.
## See help("Deprecated")
re<-rect.hclust(hc,k=4,border="red")
par(mar = c(0.2, 0.2, 0.2, 0.2), mfrow = c(2, 2))
for (n in c(63, 60, 76, 74)) {
set.seed(711)
plot.new()
size = c(replicate(n, 1/rbeta(2, 1.5, 4)))
center = t(replicate(n, runif(2)))
center = center[rep(1:n, each = 2), ]
color = apply(replicate(2 * n, sample(c(0:9,
LETTERS[1:6]), 8, TRUE)), 2, function(x) sprintf("#%s", paste(x, collapse = "")))
points(center, cex = size, pch = rep(20:21, n), col = color)
box()
}
col1 = "red";col2="blue";col3="green"
par(bg="grey25");#背景
plot(x=c(1),y=c(2.5),xlim=c(0,5),col="gray27",ylim=c(0,10),axes=F,ann=F,pch=20);#坐标
#画线
lines(c(1,1),c(2.5,8),col=col1,lwd=5);
lines(c(2,2),c(2.5,8),col=col1,lwd=5);
lines(c(2,2),c(0,2.5),col=col2,lwd=5);
lines(c(3,3),c(2,4.7),col=col2,lwd=5);
lines(c(3,3),c(4.7,6),col=col3,lwd=5);
lines(c(3,3),c(6,8),col=col1,lwd=5);
lines(c(4,4),c(1,3.5),col=col2,lwd=5);
lines(c(4,4),c(3.5,4.8),col=col3,lwd=5);
lines(c(4,4),c(4.8,6.2),col=col1,lwd=5);
lines(c(4,4),c(6.2,7.2),col=col2,lwd=5);
lines(c(4,4),c(7.2,8),col=col1,lwd=5);
#画矩阵
polygon(c(1,2,2,1),c(2.5,2.5,8,8),col=col1,border=col1,density=c(100));
polygon(c(2,3,3,2),c(6,6,8,8),col=col1,border=col1,density=c(100));
polygon(c(3,4,4,3),c(6,4.8,6.2,7.2),col=col1,border=col1,density=c(100));
polygon(c(3,4,4,3),c(7.2,7.2,8,8),col=col1,border=col1,density=c(100));
polygon(c(2,3,3,2),c(0,2,4.7,2.5),col=col1,border=col1,density=c(100));
polygon(c(3,4,4,3),c(2,1,3.5,4.7),col=col2,border=col2,density=c(100));
polygon(c(3,4,4,3),c(2,1,3.5,4.7),col=col2,border=col2,density=c(100));
polygon(c(3,4,4,3),c(4.7,3.5,4.8,6),col=col3,border=col3,density=c(100));
#文字
text(c(1:4),c(8.5),labels=c(paste("version",1:4,sep="")),col="white");
text( 0.5 ,c(1.0,0.5,0),labels=c("mary","suzanne","martin"),col=c(col1,col2,col3));
library(RColorBrewer)
data <- VADeaths
pal=brewer.pal(4,"YlOrRd")
breaks<-c(0, 15, 26, 44, 72)
layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(8,1),
heights=c(1,1)) ## 画一个空白的图形画板,按照参数把图形区域分隔好
# 看layout的分割可以这样:
# xx <- layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(8,1), heights=c(1,1)) ; layout.show(xx)
par(mar = c(2,6,4,1 ), oma=c(0.1, 0.1 ,0.1 , 0.1), mex = 1.2 ) #Set margins for the heatmap
image(x=1:nrow(data),
y=1:ncol(data),
z=data,axes=FALSE,
xlab="Month", ylab="", main="Sales Heat Map" ,
col=pal[1:(length(breaks)-1)],
breaks=breaks ) # breaks 颜色块对应的数值(数值分组),要比颜色数量多1个
axis(1, col="white",las=1 , at=1:nrow(data), labels=rownames(data) )
axis(2, col="white",las=1 , at=1:ncol(data), labels=colnames(data) )
abline(h=c(1:ncol(data))+0.5, v=c(1:nrow(data))+0.5, col="white",lwd=2,xpd=FALSE)
# 画标尺
breaks2 <- breaks[-length(breaks)] # breaks 少一个
par(mar = c(2,1,4,2))
image(x = 1, y= 0:length(breaks2),
z=t(matrix(breaks2))*1.001,
col=pal[1:length(breaks)-1],
axes=FALSE,breaks=breaks,
xlab="", ylab="",xaxt="n")
axis(4,at=0:(length(breaks2)-1), labels=breaks2, col="white", las=1)
abline(h=c(1:length(breaks2)),col="white",lwd=2, xpd=F )
y1 <- function(x) {
(2*x+sqrt(5-21*x^2))/5
}
y2 <- function(x) {
(2*x-sqrt(5-21*x^2))/5
}
low <- -sqrt(5/21)
high <- sqrt(5/21)
curve(y1, low, high, ylim=c(low, high))
curve(y2, low, high, add=T)
x <- c(1:5); y <- x # create some data
par(pch=22, col="red") # plotting symbol and color
par(mfrow=c(2,4)) # all plots on one page
opts = c("p","l","o","b","c","s","S","h")
for(i in 1:length(opts)){
heading = paste("type=",opts[i])
plot(x, y, type="n", main=heading)
lines(x, y, type=opts[i])
}
library( RColorBrewer)
display.brewer.all(n=10, exact.n=FALSE) # 调色板
col= brewer.pal( n ,"Set1") # 引用颜色
## Warning: n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
x <- c(1:10); y <- x; z <- 10/x
# 为一个坐标在右边创建额外页边空间
par(mar=c(5, 4, 4, 8) + 0.1)
# 绘制 x 相对 y
plot(x, y,type="b", pch=21, col="red",
yaxt="n", lty=3, xlab="", ylab="")
# 添加 x 相对 1/x
lines(x, z, type="b", pch=22, col="blue", lty=2)
# 在左边画一条轴线
axis(2, at=x,labels=x, col.axis="red", las=0)
# 用小字体标记在右边画一条轴线
axis(4, at=z,labels=round(z,digits=2),
col.axis="blue", las=2, cex.axis=0.7, tck=-.01)
# 为右边轴线添加标题
mtext("y=1/x", side=4, line=3, cex.lab=1,las=2, col="blue")
# 添加主标题,底和左轴标签
title("An Example of Creative Axes", xlab="X values",
ylab="Y=X")
plot(1:10, type = "n", xlim = c(0, 20), ylim = c(0, 20)) # 不作图,只画出框架,且指定坐标轴范围
lines(1:10, abs(rnorm(10))) # 10个正态随机数绝对值的波动线
# 不同的直线
abline(a = 0, b = 1, col = "gray")
abline(v = 2, lty = 2)
abline(h = 2, lty = 2)
#添加文本
text(8, 3, "abline(a = 0, b = 1)")
# 添加箭头
arrows(8, 3.5, 6, 5.7, angle = 40)
# 参数用了向量:不同灰度的线段
segments(rep(3, 4), 6:9, rep(5, 4), 6:9, col = gray(seq(0.2, 0.8, length = 4)))
text(4, 9.8, "segments")